# -*- coding: utf-8 -*-
"""
Created on Mon Feb 19 13:30:05 2024

@author: yys
"""

#计算


import numpy as np
import pandas as pd
import scipy.interpolate as spi
import os


def get_file_names_in_directory(directory_path):
    file_names = []
    
    # 遍历指定目录下的所有文件
    for file_name in os.listdir(directory_path):
        if os.path.isfile(os.path.join(directory_path, file_name)):
            file_names.append(file_name)
    
    return file_names




# 读取文本文件数据
# file_path_in = "D:\\guo\\txt\\lsl\\"
file_path_in = "J:\\BJ TK\\bj2018_4_2019_10tk\\tk_xlsx\\"
# year=file_path_in[17:21]
file_path_out = "J:\\BJ TK"
# 获取文件列表
file_names_list = get_file_names_in_directory(file_path_in)
# 循环打印文件名

for file_name in file_names_list:
    txt_name=file_name
    print('提取'+txt_name)

# txt_name="UPAR_WEA_CHN_MUL_FTM_SEC-58238-2021040100.txt"
    txt_file =file_path_in+txt_name
    current_data = pd.read_excel(txt_file)
    current_data=current_data.iloc[:,6:29]
    ls=["Lat","Lon","Alti","ReFactor","Lat_Dev","Lon_Dev","Time_Dev_WQ","Elev","Bear","DIST"]
    data_dropped = current_data.drop(columns=ls)
    # current_data=current_data.iloc[:,6:29]
    # 按指定列排序，假设列名为 'column_name'
    data0 = data_dropped.sort_values(by='GPH')
    name=txt_name[3:13]  # 从索引2开始，到索引8之前的位置
    year=name[0:4]

    
# 判断 剔除异常数据
    data0 = data0.replace(999999, np.nan)

   
    # 删除 'GPH' 列中为 NaN 的行
    data0 = data0.dropna(subset=['GPH'])
    column_all_empty = data0['GPH'].isnull().all()

    if column_all_empty:
        continue
 
  
    data1 = data0.iloc[:, 7:17].copy()#5-16列#待定

    #剔除无风对风向计算影响
    data1['WIN_D'] = np.where((data1['WIN_S'] == 0) & (data1['WIN_D'] == 0), np.nan, data1['WIN_D'])

    # 将风向从度数转换为弧度
    data1['wind_direction_rad'] = np.radians(data1['WIN_D'])
    # 计算平均风速的x和y分量
    data1['wind_speed_x'] = data1['WIN_S'] * np.cos(data1['wind_direction_rad'])
    data1['wind_speed_y'] = data1['WIN_S'] * np.sin(data1['wind_direction_rad'])
    lsls=["WIN_D","WIN_S","wind_direction_rad"]
    data1= data1.drop(columns=lsls)
    # 指定要保留两位小数的列
    columns_to_round = ['wind_speed_x', 'PRS_HWC', 'TEM', 'wind_speed_y', 'RHU']


#缺失值暂时线性插值补
    for column in data1.columns:
        data1[column].interpolate(method='linear', inplace=True)
    # 指定要保留两位小数的列
    columns_to_round = ['wind_speed_x', 'PRS_HWC', 'TEM', 'wind_speed_y', 'RHU']
    # 对指定列保留两位小数
    data1[columns_to_round] = data1[columns_to_round].round(2)
# #### 数据样条插值  
    data1.reset_index(drop=True, inplace=True)
# 将指定列移到第一列
    cols = ['GPH'] + [col for col in data1 if col != 'GPH']
    data1 = data1[cols]    
    dxy=data1.keys()#后续插值索引定位

#数据准备
    X=data1.index #定义数据点
    Y1=data1[str(dxy[0])].values #定义数据点
    Y2=data1[str(dxy[1])].values #定义数据点 换成日期字符串可能ok
    x=np.arange(0,len(data1),0.01) #定义观测点

 
    #进行一阶样条差值
    ipo1=spi.splrep(X,Y1,k=3) #源数据点导入，生成参数 3次
    iy1=spi.splev(x,ipo1) #根据观测点和样条参数，生成插值
    ipo2=spi.splrep(X,Y2,k=3) #源数据点导入，生成参数
    iy2=spi.splev(x,ipo2) #根据观测点和样条参数，生成插值

    aa=np.column_stack((iy1,iy2))
    a=aa.shape;
    rows=a[0];
    cols=a[1];
    aaa=aa[:,0] #数组切片
    bbb=aa[:,1] #数组切片
    aaaa=np.around(aaa, 0)#保留一位小数
    bbbb=np.around(bbb, 3)#保留一位小数
    cc=np.vstack((aaaa,bbbb)).T#重新组合回原格式二维数组

#数组转字典 #分类求平均
    columns_name1=[str(dxy[0]),str(dxy[1])]
    df1=pd.DataFrame(cc,columns=columns_name1)

# 循环
    datashape=data1.shape;
    datarows=datashape[0];
    datacols=datashape[1];

    for uu in range(2,datacols):############################ 最后换成 datacols+1
        u=(dxy[uu])
        Y=data1[u].values #定义数据点
        # print (u) 
        ipo=spi.splrep(X,Y,k=3) #源数据点导入，生成参数   
        iy=spi.splev(x,ipo) #根据观测点和样条参数，生成插值
        iyu=np.around(iy, 3)
        iyu = pd.DataFrame(iyu)
        df1[u]=iyu#字典末尾接插值后的
        columns_name1.append(u)#list末尾接值


# 将全为0的行删除
    df1 = df1[(df1 != 0).any(axis=1)]
    dfmean = df1.groupby([str(dxy[0])])[columns_name1].mean()    #分类求平均后结果
# 创建一个新的列作为索引
    dfmean['new_index'] = dfmean.index
# 将新的列设置为索引
    dfmean.set_index('new_index', inplace=True)

# #高度取整5gpm
    dfmean.iloc[:, 0] = dfmean.iloc[:, 0].apply(lambda x: round((x-31) / 5) * 5)
    datals = dfmean.groupby([str(dxy[0])])[columns_name1].mean()
    
# 计算平均风向（将弧度转换为度数） 风向剔除无风后 用矢量计算 #
    datals['avg_wind_direction'] = np.degrees(np.arctan2(datals['wind_speed_y'], datals['wind_speed_x']))
# 将小于0的风向值加上360
    datals['avg_wind_direction'] = datals['avg_wind_direction'].apply(lambda x: x + 360 if x < 0 else x)
    datals['avg_wind_speed']=np.sqrt(datals['wind_speed_x']**2 + datals['wind_speed_y']**2)
###存插值后5m数据
    datals.to_csv('J:\\BJ TK\\5m.csv', index=False,header=True,encoding='utf-8')
    ariables = ["geopotential_height","temperature","pressure","humidity",
                "wind_speed_x","wind_speed_y","wind_direction", "wind_speed"]
    datals.columns=ariables
    # 指定要保留两位小数的列
    # 对指定列保留两位小数
    datals["geopotential_height"]=datals["geopotential_height"]+31
    datals[ariables] = datals[ariables].round(2)
    datals.to_csv(file_path_out+'\\5m\\'+year+'\\'+name+'_5m.csv', index=False,header=True,encoding='utf-8')

